* CGK_Income.do: Constructs original income data series from the raw CEX files. It corrects for imputation that starts in 2004Q1 and for bracketing that starts in 2001Q1.
*
* Coibion-Gorodnichenko-Kueng; Initial version: March 2012; this version: May 2017

global data = "C:\Users\nfumi\Desktop\HANKempirical\InnocentBystandersreplication_folder\source_files\build files for CEX"
global home = "$data"
global savetype "saveold"


			********
			* MEMB * (MEMBer characteristics and income)
			********

	*** Construct aggregated MEMB variables (to be combined with FMLY income variables) ***

use "$data/stata/MEMBnew.dta", clear

* combine raw and bracketed series (Note: This seems to be the best way of dealing with sample breaks.)
gen       FSALARYXrb = FSALARYXr
replace   FSALARYXrb = FSALARYXb if (FSALARYXr==.|FSALARYXr==0) & (FSALARYXb!=.& FSALARYXb!=0)
label var FSALARYXrb "raw and bracketed values"
gen       FNONFRMXrb = FNONFRMXr
replace   FNONFRMXrb = FNONFRMXb if (FNONFRMXr==.|FNONFRMXr==0) & (FNONFRMXb!=.& FNONFRMXb!=0)
label var FNONFRMXrb "raw and bracketed values"
gen       FFRMINCXrb = FFRMINCXr
replace   FFRMINCXrb = FFRMINCXb if (FFRMINCXr==.|FFRMINCXr==0) & (FFRMINCXb!=.& FFRMINCXb!=0)
label var FFRMINCXrb "raw and bracketed values"
gen       FRRETIRXrb = FRRETIRXr
replace   FRRETIRXrb = FRRETIRXb if (FRRETIRXr==.|FRRETIRXr==0) & (FRRETIRXb!=.& FRRETIRXb!=0)
label var FRRETIRXrb "raw and bracketed values"
gen       FSSIXrb    = FSSIXr
replace   FSSIXrb    = FSSIXb if (FSSIXr==.|FSSIXr==0) & (FSSIXb!=.& FSSIXb!=0)
label var FSSIXrb    "raw and bracketed values"

* check fraction of invalid blanks to be imputed
gen FSALARYXimpI = (FSALARYXimp>0)
gen FNONFRMXimpI = (FNONFRMXimp>0)
gen FFRMINCXimpI = (FFRMINCXimp>0)
gen FRRETIRXimpI = (FRRETIRXimp>0)
gen FSSIXimpI    = (FSSIXimp>0)
gen FAMTFEDXimpI = (FAMTFEDXimp>0)
gen FSLTAXXimpI  = (FSLTAXXimp>0)
sum *impI

* label variables
label var FSALARYXr    "raw series: aggregated from MEMB files"
label var FNONFRMXr    "raw series: aggregated from MEMB files"
label var FFRMINCXr    "raw series: aggregated from MEMB files"
label var FRRETIRXr    "raw series: aggregated from MEMB files"
label var FSSIXr       "raw series: aggregated from MEMB files"
label var FSALARYXb    "bracketed series: aggregated from MEMB files"
label var FNONFRMXb    "bracketed series: aggregated from MEMB files"
label var FFRMINCXb    "bracketed series: aggregated from MEMB files"
label var FRRETIRXb    "bracketed series: aggregated from MEMB files"
label var FSSIXb       "bracketed series: aggregated from MEMB files"
label var FSALARYXm    "imputed series: aggregated from MEMB files"
label var FNONFRMXm    "imputed series: aggregated from MEMB files"
label var FFRMINCXm    "imputed series: aggregated from MEMB files"
label var FRRETIRXm    "imputed series: aggregated from MEMB files"
label var FSSIXm       "imputed series: aggregated from MEMB files"
label var FSALARYXimpI "imputation indicator: aggregated from MEMB files"
label var FNONFRMXimpI "imputation indicator: aggregated from MEMB files"
label var FFRMINCXimpI "imputation indicator: aggregated from MEMB files"
label var FRRETIRXimpI "imputation indicator: aggregated from MEMB files"
label var FSSIXimpI    "imputation indicator: aggregated from MEMB files"
label var FAMTFEDXimpI "imputation indicator: aggregated from MEMB files"
label var FSLTAXXimpI  "imputation indicator: aggregated from MEMB files"

* save Aggregated MEMB variables
keep NEWIDunique yq FSALARY* FNONFRM* FFRMINC* FRRETIR* FSSI* FAMTFED* FSLTAXX* // family income aggregates
duplicates drop NEWIDunique yq, force // keep one observation per household-interview
sort  NEWIDunique
compress
label data "Aggregated MEMB variables for Coibion-Gorodnichenko-Kueng"
$savetype "$data/stata/MEMBaggregatednew.dta", replace
*/




	*** Construct individual MEMB variables (to be used for TAXSIM) ***

use "$data/stata/MEMBnew.dta", clear
drop FSALARY* FNONFRM* FFRMINC* FRRETIR* FSSI* FAMTFED* FSLTAXX* // family income aggregates

* combine raw and bracketed series
gen       SALARYXrb = SALARYX
replace   SALARYXrb = SALARYBX if SALARYBX!=. & (SALARYX==.|SALARYX==0)
label var SALARYXrb "raw and bracketed values"
gen       NONFARMXrb = NONFARMX
replace   NONFARMXrb = NONFRMBX if NONFRMBX!=. & (NONFARMX==.|NONFARMX==0)
label var NONFARMXrb "raw and bracketed values"
gen       FARMINCXrb = FARMINCX
replace   FARMINCXrb = FRMINCBX if FRMINCBX!=. & (FARMINCX==.|FARMINCX==0)
label var FARMINCXrb "raw and bracketed values"
gen       SOCRRXrb = SOCRRX
replace   SOCRRXrb = SOCRRBX if SOCRRBX!=. & (SOCRRX==.|SOCRRX==0)
label var SOCRRXrb "raw and bracketed values"

* check fraction of invalid blanks to be imputed
gen SALARYXimpI  = (SALARYXrb==.)
gen NONFARMXimpI = (NONFARMXrb==.)
gen FARMINCXimpI = (FARMINCXrb==.)
gen SOCRRXimpI   = (SOCRRXrb==.)
gen SSIXimpI     = (SSIX==.)
gen ANFEDTXimpI  = (ANFEDTX==.)
gen ANSLTXimpI   = (ANSLTX==.)
sum SALARYXimpI-ANSLTXimpI

order NEWIDunique yq 
sort  NEWIDunique
compress
label data "Individual MEMB variables for Coibion-Gorodnichenko-Kueng"
$savetype "$data/stata/MEMBindividualnew.dta", replace
*/









			*************************
			* Combine FMLY and MEMB * (FMLY = FaMiLY characteristics and income)
			*************************


* select FMLY variables of interest
use "$data/stata/FMLYnew.dta", clear
keep NEWIDunique intdate QINTRVMO QINTRVYR FINLWT21 fwt RESPSTAT ///
 UNEMPLXrb COMPENSXrb WELFAREXrb INTEARNX FININCXrb PENSIONXrb INCLOSSArb INCLOSSBrb OTHRINCXrb FOODSMPXrb INCCONTXrb FEDTAXX SLOCTAXX TAXPROPX FEDRFNDX SLRFUNDX MISCTAXX OTHRFNDX /// FMLY income and tax variables that are (more or less) consistent over time
 AGE_REF SEX_REF REF_RACE EDUC_REF INCWEEK1 FAM_SIZE PERSLT18 PERSOT64 NO_EARNR // FMLY covariates for imputation

* merge MEMB variables of interest
merge 1:1 NEWIDunique using "$data/stata/MEMBaggregatednew.dta", keepusing( ///
 FSALARYXrb   FNONFRMXrb   FFRMINCXrb   FRRETIRXrb   FSSIXr    FAMTFEDXr    FSLTAXXr    /// MEMB income and tax variables that are (more or less) consistent over time
 FSALARYXimpI FNONFRMXimpI FFRMINCXimpI FRRETIRXimpI FSSIXimpI FAMTFEDXimpI FSLTAXXimpI /// MEMB income and tax imputation indicators
 )
keep if _merge==3
drop _merge
drop if INCWEEK1==. // drops only one observation
order NEWIDunique intdate QINTRVMO QINTRVYR fwt FINLWT21 RESPSTAT ///
 FSALARYXrb FNONFRMXrb FFRMINCXrb FRRETIRXrb FSSIXr FAMTFEDXr FSLTAXXr /// MEMB income and tax variables
 UNEMPLXrb  COMPENSXrb WELFAREXrb INTEARNX FININCXrb PENSIONXrb INCLOSSArb INCLOSSBrb OTHRINCXrb FOODSMPXrb INCCONTXrb FEDTAXX SLOCTAXX TAXPROPX FEDRFNDX SLRFUNDX MISCTAXX OTHRFNDX /// FMLY income and tax variables
 AGE_REF SEX_REF REF_RACE EDUC_REF INCWEEK1 FAM_SIZE PERSLT18 PERSOT64 NO_EARNR // FMLY covariates for imputation

* keep first and 5th interview (since incomes in intermediate interviews are repetitions)
gen test = floor(NEWIDunique/10)
gen intno = NEWIDunique - test*10
replace NEWIDunique=test
bysort NEWIDunique: egen minintno=min(intno)
keep if intno==minintno | intno==5
drop test
drop minintno

* deflate nominal variables
gen month=intdate
merge m:1 month using "$data/stata/CPI_unew.dta"
keep if _merge==3
drop _merge month
foreach var in ///
  FSALARYXrb FNONFRMXrb FFRMINCXrb FRRETIRXrb FSSIXr FAMTFEDXr FSLTAXXr /// MEMB income and tax variables
  UNEMPLXrb  COMPENSXrb WELFAREXrb INTEARNX FININCXrb PENSIONXrb INCLOSSArb INCLOSSBrb OTHRINCXrb FOODSMPXrb INCCONTXrb FEDTAXX SLOCTAXX TAXPROPX FEDRFNDX SLRFUNDX MISCTAXX OTHRFNDX { // FMLY income and tax variables
   replace `var'= `var'/cpi_u*100
}
drop cpi_u

* recode missing values for MEMB variables
replace FSALARYXrb=. if FSALARYXimpI!=0
replace FNONFRMXrb=. if FNONFRMXimpI!=0
replace FFRMINCXrb=. if FFRMINCXimpI!=0
replace FRRETIRXrb=. if FRRETIRXimpI!=0
replace FAMTFEDXr =. if FAMTFEDXimpI!=0
replace FSLTAXXr  =. if FSLTAXXimpI !=0
sum *impI

sort intdate NEWIDunique
compress
label data "Income data (raw before imputation) for Coibion-Gorodnichenko-Kueng"
$savetype "$data/CGK_Incomenew.dta" ,replace
*/
